# install.packages("lpSolveAPI")
# install.packages("readstata13")
# install.packages("dplyr")

# Clear all
rm(list = ls()) 
set.seed(47474747)
#Load Packages
library(lpSolveAPI)
library(readstata13)
library(dplyr)
library(ggplot2)
library(AER)
library(sandwich)
library(lmtest)
library(boot)
library(reshape2)

# Set Path <-- Direct to replication data folder
setwd("/Users/vedantvohra/Library/CloudStorage/Dropbox/ACR Bounds/Replication Kit")

#Prepare Data 
original_data <- read.dta13("Data/ms_blel_jpal_wide.dta") 
test_data <- read.dta13("Data/ms_ei.dta") 
original_data <- left_join(original_data,test_data, by = "st_id") %>% 
                  mutate(att_tot = if_else(treat==0,0,att_tot)) %>% 
                  mutate(week_att = if_else(treat == 0,0,ceiling(att_tot/7))) %>% 
                  filter(!is.na(h_theta_mle2),!is.na(h_theta_mle1),!is.na(m_theta_mle2),!is.na(m_theta_mle1),!is.na(treat)) %>% 
                  select(st_id,week_att,h_theta_mle1,h_theta_mle2,treat,strata)


# Load Function "cce"
source("Functions/cce.R")

wrapper <- function(data, index, subsampleSize){
  main_data <- data[sample(1:nrow(data),size=subsampleSize,replace=TRUE),]
  
  iv <- ivreg(h_theta_mle2 ~ week_att+ h_theta_mle1 + factor(strata) | treat + h_theta_mle1 + factor(strata),
              data = main_data)
  
  iv_res <- coeftest(iv, vcov=vcovHC(iv, type="HC0")) 
  
  hindi_acr_weeks <- iv_res[2,1]
  
  weight_vars <- list()
  weights <- c()
  
  for (i in 1:12) {
    nam <- paste("week_",i,"_or_more",sep = "")
    weight_vars[[nam]] <- assign(nam, if_else(main_data$week_att >= i, 1, 0))
    model <- lm(weight_vars[[i]] ~ main_data$treat + factor(main_data$strata))
    weights <- c(weights,coeftest(model, vcov=vcovHC(model, type="HC0"))[2,1])
  }
  weights<-weights/sum(weights)
  
  # LP Result assuming concavity
  results_conc_hindi_weeks <- c(cce(1,weights,hindi_acr_weeks)$`Lower Bound`,cce(1,weights,hindi_acr_weeks)$`Upper Bound`)
  
  return(results_conc_hindi_weeks)
}

N <- nrow(original_data) ##sample size of dataset

q <- 0.95 ##inputs for determining candidate values for m 
k <- 500 

m.candidates <- unique(ceiling(N * q^(0:k)))
m.candidates <- m.candidates[m.candidates>=40] ##these are the candidate values for m

#This is the number of replicated datasets to be generated for each candidate value m.
#Let it be a large number.

bootrep <- 5000 

#This is the number of replicated datasets to be generated
#for the value m that is selected. 

bootrep.chosenM <- 10000

##STEP 1: Selecting m
numCandidates <- length(m.candidates)

matLB <- matrix(NA, nrow = numCandidates, ncol = bootrep) 
matUB <- matrix(NA, nrow = numCandidates, ncol = bootrep)

for (l in 1:numCandidates){
  boot.obj <- boot(data = original_data, statistic= wrapper, R=bootrep, subsampleSize = m.candidates[l])
  matLB[l,] <- t(sort(boot.obj$t[,1], na.last = TRUE))
  matUB[l,] <- t(sort(boot.obj$t[,2], na.last = TRUE))
}

##This loop gets the lower and upper bound estimates of the 5,000 bootstrap replicated datasets. The
##results are stored in matLB and matUB, respectively.

##For each candidate value, there is a row in matLB (matUB) with the lower (upper) bound estimates for that candidate value.
##The lower (upper) bound estimates are ordered from smallest to largest, with any NA's at the end.

##Now we select the value m for the lower bound
##Label each row with the candidate value to which it corresponds.
matLB <- cbind(m.candidates, matLB) 

##get rid of candidate m's with any NA's (m was too small, nT or nC = 0) 
matLB <- matLB[complete.cases(matLB),] 

##these are the candidates remaining after excluding those with NA's
m.candidatesLB <- matLB[,1] 

##Take off the labels. We don't want these when choosing m.
matLB <- matLB[,-1] 

##Take the difference between adjacent rows (row 1 - row 2, row2 - row3, row3 - row4, ...)
temp1 <- matLB[1:(nrow(matLB)-1),]-matLB[2:nrow(matLB),] 

##Take the absolute value (|row1 - row2|, |row2 - row3|, |row3 - row4|, ...)
temp1 <- abs(temp1) 

##Find the maximum (max{|row1 - row2|}, max{|row2 - row3|}, max{|row3 - row4}, ...)
temp1 <- apply(temp1, 1, max) 

m.chosenLB <- m.candidatesLB[min(which(temp1==min(temp1)))] 
##Choose the value m with the smallest max absolute difference. 
##If multiple m attain the smallest max absolute difference, choose the largest one.
##m.chosenLB is the value m selected for lower bound

##Separately, but using the same procedure, we select the value m for the upper bound
matUB <- cbind(m.candidates, matUB)
matUB <- matUB[complete.cases(matUB),] 
m.candidatesUB <- matUB[,1]
matUB <- matUB[,-1]
temp1 <- matUB[1:(nrow(matUB)-1),]-matUB[2:nrow(matUB),]
temp1 <- abs(temp1)
temp1 <- apply(temp1, 1, max)
m.chosenUB <- m.candidatesUB[min(which(temp1==min(temp1)))] 

m.chosenLB <- 357
m.chosenUB <- 203

##STEP 2: Run bootstrap using selected values of m##
boot.obj <- boot(data = original_data, statistic= wrapper, R=bootrep.chosenM, subsampleSize = m.chosenLB)
LB.CI_mn <- c(quantile(boot.obj$t[,1], probs = c(.025,0.975)), m.chosenLB)

boot.obj <- boot(data = original_data, statistic= wrapper, R=bootrep.chosenM, subsampleSize = m.chosenUB)
UB.CI_mn <- c(quantile(boot.obj$t[,2], probs = c(.025,0.975)), m.chosenUB)

#Final Results
iv <- ivreg(h_theta_mle2 ~ week_att+ h_theta_mle1 + factor(strata) | treat + h_theta_mle1 + factor(strata),
            data = original_data)

iv_res <- coeftest(iv, vcov=vcovHC(iv, type="HC0")) 

hindi_acr_weeks <- iv_res[2,1]
weight_vars <- list()
weights <- c()

for (i in 1:12) {
  nam <- paste("week_",i,"_or_more",sep = "")
  weight_vars[[nam]] <- assign(nam, if_else(original_data$week_att >= i, 1, 0))
  model <- lm(weight_vars[[i]] ~ original_data$treat + factor(original_data$strata))
  weights <- c(weights,coeftest(model, vcov=vcovHC(model, type="HC0"))[2,1])
}
weights<-weights/sum(weights)

# LP Result assuming concavity
results_conc_hindi_weeks <- c(cce(1,weights,hindi_acr_weeks)$`Lower Bound`,cce(1,weights,hindi_acr_weeks)$`Upper Bound`)


results <- round(c(hindi_acr_weeks,results_conc_hindi_weeks,  LB.CI_mn[1],  UB.CI_mn[2]),3)
labels <- c("ACR", "Lower Bound", "Upper Bound", "Lower CI", "Upper CI")
names(results) <- labels
